import requests
from lxml import etree

# 获取页面源代码
url = "https://www.shanghairanking.cn/rankings/bcur/2021"
resp = requests.get(url)
resp.encoding = 'utf-8'
rt = resp.text
# 提取数据
et = etree.HTML(rt)
tbodys = et.xpath('//table[@class="rk-table"]/tbody')
print(tbodys)
with open("../../../file_data/school.txt", "w", encoding='utf-8') as f:
    for tr in tbodys:
        # 高校名称
        name = tr.xpath('./tr/td[2]/div/div[2]/div[1]/div/div/a/text()')
        # 高校排名
        order = tr.xpath('./tr/td[2]/div/div[2]/p/text()')
        # 高校地址
        addr = tr.xpath('./tr/td[3]/text()')
        # 高校类型
        synthesis = tr.xpath('./tr/td[4]/text()')
        # 评价总分
        fraction = tr.xpath('./tr/td[5]/text()')
        # 以上返回的是列表
        for i in range(len(name)):
            print(str(name[i]).strip(), ",", str(order[i]).strip().replace(" ", ""), ",", str(addr[i]).strip(), ",",
                  str(synthesis[i]).strip(), ",", str(fraction[i]).strip())
            f.write(
                f"{str(name[i]).strip()},{str(order[i]).strip()},{str(addr[i]).strip()},{str(synthesis[i]).strip()},{str(fraction[i]).strip()}\n")
